
pacman::p_load(
  tidyverse,
  lubridate,
  fs,
  fst,
  assertthat,
  tictoc,
  kableExtra
)

v_all_qtr <- seq.Date(as.Date(V_TRAINING_START_QTR[1]), as.Date(V_TEST_END_QTR[8]), by = "quarter")

min_year_ <- min(v_all_qtr) %>% 
  year()

max_year_ <- max(v_all_qtr) %>% 
  year()


file_info <- tibble(file_name = dir_ls("../../data_qtr_rand_no_cid_with_outcome/")) %>% 
  mutate(
    qtr = ymd(str_extract(file_name, "[0-9]{8}")),
    rand_no_cid = str_extract(file_name, "[0-9]{4}(?=.fst)")
  )

df_files <- file_info %>% 
  filter(qtr %in% v_all_qtr, rand_no_cid %in% RAND_NO_CID)

are_equal(unique(df_files$rand_no_cid), RAND_NO_CID) %>% 
  stopifnot("Desired training IDs not present in pulled files" = .)

tic(msg = "Loading Files")
df_raw <- map_dfr(df_files$file_name, function(x) {
  print(x) 
  read_fst(x, columns = c("cid", "qtr", "state", "county_code", 
                          "census_tract", "t_default", "consumer_category")) %>% 
    mutate(
      rand_no_cid = str_extract(x, "[0-9]{4}(?=.fst)"),
      Year = year(qtr)
    ) 
})
toc()

df_fips <- read_csv("../../data/fips.csv") %>% 
  distinct(state, state_code)

Load_CRA <- function(data_year) {
  
  path_CRA <- paste0("../../data/CRA/final/", data_year, ".csv")
  df <- read_csv(path_CRA, col_types = "ddcddc") 
  
  df %>% 
    filter(!is.na(Income_Level)) %>% 
    left_join(., df_fips, by = c("State" = "state_code"))  %>%
    select(-State)
  
}

df_CRA <- map_dfr(min_year_:max_year_, ~ Load_CRA(.)) %>% 
  mutate(
    across(-state,  ~ as.double(.)),
    County = as.character(County),
    Tract = as.character(Tract)
  )

df_merged <- left_join(df_raw, df_CRA, by = c("Year", 
                                              "county_code" = "County",
                                              "state",
                                              "census_tract" = "Tract")) %>% 
  mutate(
    d_Income_Level = case_when(Income_Level %in% 1:2 ~ 0,
                               Income_Level %in% 3:4 ~ 1,
                               TRUE ~ NA_real_)
  )


# CC/CD/CF ----------------------------------------------------------------

Calculate_Group_Defaults <- function(merged_data_frame, grouping_variable) {
  
  df_filtered <- merged_data_frame %>% 
    filter(!is.na(.data[[grouping_variable]])) 
  
  total_obs <- nrow(df_filtered)
  
  df_filtered %>% 
    group_by(.data[[grouping_variable]]) %>% 
    summarize(
      "\\% of Observations" = 100 * (n() / total_obs),
      "\\% Default" = 100 * mean(t_default)
    ) %>% 
    rename(
      "Consumer Group" = grouping_variable
    )
  
  
}

df_CC_CD_CF <- Calculate_Group_Defaults(df_merged, "consumer_category") %>% 
  mutate(
    `Consumer Group` = case_when(
      `Consumer Group` == "CC" ~ "Current",
      `Consumer Group` == "CD" ~ "$<$90 Days Past Due",
      `Consumer Group` == "CF" ~ "$\\geq$90 Days Past Due",
      TRUE ~ `Consumer Group`
    )
  )

df_Majority_Minority <- Calculate_Group_Defaults(df_merged, "d_Income_Level") %>% 
  arrange(desc(`Consumer Group`)) %>% 
  mutate(
    `Consumer Group` = if_else(`Consumer Group` == 1, "Non-LMI", "LMI")
  ) 

total_obs_both <- df_merged %>% 
  filter(!is.na(d_Income_Level),
         !is.na(consumer_category)) %>% 
  nrow()

df_overall_default_both <- df_merged %>% 
  filter(!is.na(d_Income_Level),
         !is.na(consumer_category)) %>% 
  summarize(
    "\\% of Observations" = 100 * (n() / total_obs_both),
    "\\% Default" = 100 * mean(t_default)
  ) %>% 
  mutate(
    "Consumer Group" = "Overall"
  ) %>% 
  select(`Consumer Group`, "\\% of Observations", "\\% Default")

df_group_default_both <- df_merged %>% 
  filter(!is.na(d_Income_Level),
         !is.na(consumer_category)) %>% 
  group_by(d_Income_Level, consumer_category) %>% 
  summarize(
    "\\% of Observations" = 100 * (n() / total_obs_both),
    "\\% Default" = 100 * mean(t_default)
  ) %>% 
  ungroup() %>% 
  mutate(
    "Consumer Group" = case_when(
      d_Income_Level == 0 & consumer_category == "CC" ~ "LMI \\& Current",
      d_Income_Level == 0 & consumer_category == "CD" ~ "LMI \\& $<$90 Days Past Due",
      d_Income_Level == 0 & consumer_category == "CF" ~ "LMI \\& $\\geq$90 Days Past Due",
      d_Income_Level == 1 & consumer_category == "CC" ~ "Non-LMI \\& Current",
      d_Income_Level == 1 & consumer_category == "CD" ~ "Non-LMI \\& $<$90 Days Past Due",
      d_Income_Level == 1 & consumer_category == "CF" ~ "Non-LMI \\& $\\geq$90 Days Past Due"
    )
  ) %>% 
  arrange(desc(d_Income_Level)) %>% 
  select("Consumer Group", "\\% of Observations", "\\% Default")

df_overall_both <- bind_rows(
  df_overall_default_both,
  df_Majority_Minority,
  df_CC_CD_CF,
  df_group_default_both
) %>% 
  rename(
    "\\% Default (Next 2 Y.)" = "\\% Default"
  )

kable(df_overall_both, format = "latex", booktabs = TRUE, escape = FALSE, digits = 1, linesep = "") %>% 
  row_spec(., c(1, 3, 6), hline_after = TRUE) %>% 
  pack_rows("Census Tract Status", 2, 3) %>%
  pack_rows("Current Delinquency Status", 4, 6) %>%
  pack_rows("Census Tract Status \\&\n Current Delinquency Status", 7, 12) %>%
  save_kable(., paste0("../../data/pipeline_outputs/", SPECIAL_SUFFIX, "/", "tables/", "cra_consumer_sectioned.tex"))
